Use output from Humann2 to assess the gene families, pathway abundance, and pathway coverage, assigned to Gardnerella vaginalis across samples.
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggrepel)
gardRelativeAbundance <- read_delim("/Users/hlberman/Documents/GitHub/pregnancy_metagenome/gard_mapping/clade_assignments/gardRelativeAbundance.tsv", delim = "\t") %>%
mutate(SampleID=as.character(SampleID),
Clade=as.character(Clade))
## Parsed with column specification:
## cols(
## SampleID = col_double(),
## Clade = col_double(),
## n = col_double(),
## propClade = col_double()
## )
sgDF <- read_delim("/Users/hlberman/Documents/GitHub/pregnancy_metagenome/shotgunMetadata.tsv", delim = "\t")%>%
mutate(SampleID=as.character(SampleID))
## Parsed with column specification:
## cols(
## .default = col_double(),
## SampleIndex = col_character(),
## SampleType = col_character(),
## TermPre = col_character(),
## Operator = col_character(),
## PctTrim = col_character(),
## RNAlater = col_character(),
## Cohort = col_character()
## )
## See spec(...) for full column specifications.
load("/Users/hlberman/Documents/GitHub/pregnancy_metagenome/methods_comparisons/PS_RData/metaphlan_PS.RData")
# define file paths
dataDir <- "/Volumes/GoogleDrive/My Drive/Callahan Lab/vagMicrobiomeMetagenome/humann2/out_tables"
GF_path <- file.path(dataDir, "VM_genefamilies_names_cpm.tsv")
GFP_path <- file.path(dataDir, "VM_genefamilies_paths_cpm.tsv")
GOnames_path <- file.path(dataDir, "map_go_name.txt")
GFGO_path <- file.path(dataDir, "map_go_uniref90.txt")
PWnames_path <- file.path(dataDir, "map_metacyc-pwy_name.txt")
PC_path <- file.path(dataDir, "VM_pathcoverage.tsv")
PA_path <- file.path(dataDir, "VM_pathabundance_cpm.tsv")
# read in data frames
GF <- read_delim(GF_path, delim = "\t") # Gene family abundances
## Parsed with column specification:
## cols(
## .default = col_double(),
## `# Gene Family` = col_character()
## )
## See spec(...) for full column specifications.
GFP <- read_delim(GFP_path, delim = "\t") # pathways for each gene family
## Parsed with column specification:
## cols(
## .default = col_character(),
## `1000801248_Abundance` = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 1662 parsing failures.
## row col expected actual file
## 1 -- 108 columns 2 columns '/Volumes/GoogleDrive/My Drive/Callahan Lab/vagMicrobiomeMetagenome/humann2/out_tables/VM_genefamilies_paths_cpm.tsv'
## 2 -- 108 columns 2 columns '/Volumes/GoogleDrive/My Drive/Callahan Lab/vagMicrobiomeMetagenome/humann2/out_tables/VM_genefamilies_paths_cpm.tsv'
## 3 -- 108 columns 2 columns '/Volumes/GoogleDrive/My Drive/Callahan Lab/vagMicrobiomeMetagenome/humann2/out_tables/VM_genefamilies_paths_cpm.tsv'
## 4 -- 108 columns 2 columns '/Volumes/GoogleDrive/My Drive/Callahan Lab/vagMicrobiomeMetagenome/humann2/out_tables/VM_genefamilies_paths_cpm.tsv'
## 5 -- 108 columns 2 columns '/Volumes/GoogleDrive/My Drive/Callahan Lab/vagMicrobiomeMetagenome/humann2/out_tables/VM_genefamilies_paths_cpm.tsv'
## ... ... ........... ......... .....................................................................................................................
## See problems(...) for more details.
PWnames <- read_delim(PWnames_path, delim = "\t", col_names = c("Pathway", "pathwayAnnotation"))
## Parsed with column specification:
## cols(
## Pathway = col_character(),
## pathwayAnnotation = col_character()
## )
PC <- read_delim(PC_path, delim = "\t") # pathway coverage
## Parsed with column specification:
## cols(
## .default = col_double(),
## `# Pathway` = col_character()
## )
## See spec(...) for full column specifications.
PA <- read_delim(PA_path, delim = "\t") # pathway abundnance
## Parsed with column specification:
## cols(
## .default = col_double(),
## `# Pathway` = col_character()
## )
## See spec(...) for full column specifications.
GOnames <- read_delim(GOnames_path, delim = "\t", col_names = c("GO", "Function")) # names and annotations of GO categories
## Parsed with column specification:
## cols(
## GO = col_character(),
## Function = col_character()
## )
GFGO <- read_lines(GFGO_path) %>% # read in the file as a list
str_split("\t")
GFP0 <- GFP %>%
select(`# Pathway`) %>% # keep only pathway and gene family names
filter(str_detect(`# Pathway`, "Gardnerella")) %>% # keep only Gardnerella
separate(`# Pathway`, c("Pathway", "GeneFamily"), sep ="\\|g__Gardnerella.s__Gardnerella_vaginalis\\|", fill = "right") %>% # separate
separate(GeneFamily, c("uniref", "unirefAnnotation"), ": ") %>%
filter(uniref != "") # remove empty pathways
head(GFP0)
## # A tibble: 6 x 3
## Pathway uniref unirefAnnotation
## <chr> <chr> <chr>
## 1 COA-PWY-1 UniRef90_I4LNX4 Dephospho-CoA kinase
## 2 COA-PWY-1 UniRef90_D2R9V1 Phosphopantetheine adenylyltransferase
## 3 COA-PWY-1 UniRef90_D2RAK6 Dephospho-CoA kinase
## 4 COA-PWY-1 UniRef90_D2RBT1 Type III pantothenate kinase
## 5 NONOXIPENT-PWY UniRef90_E3D9X8 Transaldolase
## 6 NONOXIPENT-PWY UniRef90_D6SZM2 Transaldolase
GOcategories <- map(GFGO, `[[`, 1) %>% # get the GO category labels
unlist()
GFGO0 <- map2(GOcategories, GFGO, ~paste(.x, .y, sep=",")) %>% # paste a separable (important for later making a dataframe) GO category to each gene family in each category
unlist() %>% # collapse list
as_tibble() %>% # make dataframe
separate(1, c("GO", "uniref"), ",") %>% #separate into columns
filter(!str_detect(uniref, "GO:[0-9]*$")) # remove rows with GO category label in uniref column
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
head(GFGO0)
## # A tibble: 6 x 2
## GO uniref
## <chr> <chr>
## 1 GO:0000001 UniRef90_A0A016PS51
## 2 GO:0000001 UniRef90_A1C8D3
## 3 GO:0000001 UniRef90_A1CBR9
## 4 GO:0000001 UniRef90_A1CE31
## 5 GO:0000001 UniRef90_A1CNY1
## 6 GO:0000001 UniRef90_A1CPB1
GF0 <- GF %>%
filter(str_detect(`# Gene Family`, "Gardnerella")) %>%
as.data.frame()
rownames(GF0) <- GF0[,1]
GF1 <- GF0 %>%
select(-`# Gene Family`)
GF2 <- GF1 %>%
t() %>%
as.data.frame(strinsAsFactors=FALSE) %>%
mutate(SampleID=str_extract(colnames(GF1), "[0-9]{10}")) %>%
gather("GeneFamily", "Abundance", contains("Gardnerella")) %>%
mutate(GeneFamily=str_extract(GeneFamily, ".*(?=.g__Gardnerella)")) %>%
separate(GeneFamily, c("uniref", "unirefAnnotation"), ": ", fill = "right") %>%
left_join(sgDF[,c("SampleID", "SubjectID", "TermPre", "GWdell", "GWcoll", "Cohort")], by="SampleID")
head(GF2)
## SampleID uniref unirefAnnotation Abundance SubjectID TermPre
## 1 1000801248 UniRef90_unknown <NA> 293.681 10008 T
## 2 1000801318 UniRef90_unknown <NA> 168.545 10008 T
## 3 1000801368 UniRef90_unknown <NA> 262.059 10008 T
## 4 1001301158 UniRef90_unknown <NA> 5946.180 10013 P
## 5 1001301248 UniRef90_unknown <NA> 11876.100 10013 P
## 6 1001301318 UniRef90_unknown <NA> 807.692 10013 P
## GWdell GWcoll Cohort
## 1 41 25 Stanford
## 2 41 32 Stanford
## 3 41 37 Stanford
## 4 35 16 Stanford
## 5 35 25 Stanford
## 6 35 31 Stanford
PA0 <- PA %>%
filter(str_detect(`# Pathway`, "Gardnerella")) %>%
as.data.frame()
rownames(PA0) <- PA0[,1]
PA1 <- PA0 %>%
select(-`# Pathway`)
PA2 <- PA1 %>%
t() %>%
as.data.frame(strinsAsFactors=FALSE) %>%
mutate(SampleID=str_extract(colnames(PA1), "[0-9]{10}")) %>%
gather("Pathway", "Abundance", contains("Gardnerella")) %>%
mutate(Pathway=str_extract(Pathway, ".*(?=.g__Gardnerella)")) %>%
left_join(sgDF[,c("SampleID", "SubjectID", "TermPre", "GWdell", "GWcoll", "Cohort")], by="SampleID")
head(PA2)
## SampleID Pathway Abundance SubjectID TermPre GWdell GWcoll
## 1 1000801248 UNINTEGRATED 5507.21 10008 T 41 25
## 2 1000801318 UNINTEGRATED 3295.92 10008 T 41 32
## 3 1000801368 UNINTEGRATED 4797.42 10008 T 41 37
## 4 1001301158 UNINTEGRATED 73881.90 10013 P 35 16
## 5 1001301248 UNINTEGRATED 174983.00 10013 P 35 25
## 6 1001301318 UNINTEGRATED 15372.60 10013 P 35 31
## Cohort
## 1 Stanford
## 2 Stanford
## 3 Stanford
## 4 Stanford
## 5 Stanford
## 6 Stanford
PC0 <- PC %>%
filter(str_detect(`# Pathway`, "Gardnerella")) %>%
as.data.frame()
rownames(PC0) <- PC0[,1]
PC1 <- PC0 %>%
select(-`# Pathway`)
PC2 <- PC1 %>%
t() %>%
as.data.frame(strinsAsFactors=FALSE) %>%
mutate(SampleID=str_extract(colnames(PC1), "[0-9]{10}")) %>%
gather("Pathway", "Coverage", contains("Gardnerella")) %>%
mutate(Pathway=str_extract(Pathway, ".*(?=.g__Gardnerella)")) %>%
left_join(sgDF[,c("SampleID", "SubjectID", "TermPre", "GWdell", "GWcoll", "Cohort")], by="SampleID")
head(PC2)
## SampleID Pathway Coverage SubjectID TermPre GWdell GWcoll
## 1 1000801248 UNINTEGRATED 1 10008 T 41 25
## 2 1000801318 UNINTEGRATED 1 10008 T 41 32
## 3 1000801368 UNINTEGRATED 1 10008 T 41 37
## 4 1001301158 UNINTEGRATED 1 10013 P 35 16
## 5 1001301248 UNINTEGRATED 1 10013 P 35 25
## 6 1001301318 UNINTEGRATED 1 10013 P 35 31
## Cohort
## 1 Stanford
## 2 Stanford
## 3 Stanford
## 4 Stanford
## 5 Stanford
## 6 Stanford
geneFamilies <- unique(paste(GF2$uniref, GF2$unirefAnnotation, sep = ": "))
GFfoldChange <- GF2 %>%
group_by(uniref, unirefAnnotation, TermPre, Cohort) %>%
summarise(mean(Abundance)) %>%
ungroup() %>%
spread(TermPre, `mean(Abundance)`) %>%
filter(Cohort=="UAB"|Cohort=="Stanford",
`T`>0,
P>0) %>%
mutate(FoldChange=case_when(P>`T`~P/`T`,
`T`>P~-`T`/P)) #%>%
# left_join(GFGO0, by="uniref") %>% # for adding GO information to the dataframe. removed for now as multiple GO categories were assigned to many uniref
# left_join(GOnames, by= "GO")
labels1 <- GFfoldChange[order(GFfoldChange$FoldChange),] %>%
select(uniref, unirefAnnotation, Cohort, FoldChange)
labels1 <- labels1[1:20,]
labels2 <- GFfoldChange[order(-GFfoldChange$FoldChange),] %>%
select(uniref, unirefAnnotation, Cohort, FoldChange)
labels2 <- labels2[1:10,]
labels <- rbind(labels1, labels2) %>%
filter(unirefAnnotation!="NO_NAME")
ggplot(GFfoldChange, aes(x=uniref, y=FoldChange, label = unirefAnnotation)) +
geom_point() +
theme(axis.text.x = element_blank()) +
ggrepel::geom_text_repel(data=labels) +
labs(y=element_blank()) +
facet_wrap(~Cohort) +
annotate("text", label = "greater in PTB", x=550, y = 500) +
annotate("text", label = "greater in TB", x=500, y = -500)
## Gene Families with Pathway information
GFfoldChangePath <- GFfoldChange %>%
left_join(GFP0, by=c("uniref", "unirefAnnotation")) %>%
filter(!is.na(Pathway)) %>%
left_join(PWnames, by="Pathway")
ggplot(GFfoldChangePath, aes(x=uniref, y=FoldChange, color = paste(Pathway, pathwayAnnotation))) +
geom_point() +
theme(axis.text.x = element_blank()) +
labs(y=element_blank(), color="Pathway") +
facet_wrap(~Cohort) +
annotate("text", label = "greater in PTB", x=7.5, y = 0.5, size = 2) +
annotate("text", label = "greater in TB", x= 7, y = -0.5, size = 2 )
Of uniref 90 gene families had pathway annotations, greatest fold increase in PTB is Threonine synthase of the superpathway of L-threonine biosynthesis (5.4 fold) in the UAB cohort and Aspartate carbamoyltransferase of the UMP biosynthesis pathway (2.6 fold) in the Stanford cohort. greatest fold increase in TB, is Dephospho-CoA kinase in the coenzyme A biosynthesis II pathway in both cohorts, with a 19.6 fold increase in the Stanford cohort and 17.901915 fold increase in the UAB cohort. Based on these results, I may need to look at another pathway database (perhaps Kegg) or consider a custom set of references.
List of greatest fold change uniref gene families (pathways not annotated for greatest fold change uniref genes)
GFfoldChange %>%
filter(FoldChange<=-2000|FoldChange>500) %>%
print(n = nrow(.), width = getOption("tibble.width"), n_extra = NULL)
## # A tibble: 16 x 6
## uniref unirefAnnotation Cohort P T FoldChange
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 UniRef90… NO_NAME Stanf… 0.00658 1.76e+1 -2671.
## 2 UniRef90… NO_NAME Stanf… 0.00860 1.79e+1 -2079.
## 3 UniRef90… NO_NAME Stanf… 0.00614 1.94e+1 -3159.
## 4 UniRef90… NO_NAME Stanf… 0.00485 1.94e+1 -4001.
## 5 UniRef90… NO_NAME Stanf… 0.00797 1.90e+1 -2382.
## 6 UniRef90… N-acetylmuramoyl-L-alanine … UAB 6.86 4.40e-3 1558.
## 7 UniRef90… 2-C-methyl-D-erythritol 4-p… UAB 2.92 5.08e-3 575.
## 8 UniRef90… NO_NAME UAB 2.79 2.99e-3 935.
## 9 UniRef90… NO_NAME UAB 3.73 5.06e-3 737.
## 10 UniRef90… Putative virion core protei… UAB 3.55 3.40e-3 1046.
## 11 UniRef90… NO_NAME UAB 3.06 4.94e-3 619.
## 12 UniRef90… NO_NAME UAB 2.87 3.43e-3 837.
## 13 UniRef90… Modification methyltransfer… UAB 0.00102 2.07e+0 -2036.
## 14 UniRef90… Putative surface-anchored p… UAB 2.15 4.03e-3 533.
## 15 UniRef90… NO_NAME Stanf… 0.00204 5.50e+0 -2695.
## 16 UniRef90… Repeat protein Stanf… 1.08 1.09e-3 997.
alternatePresence <- GF2 %>%
group_by(uniref, unirefAnnotation, TermPre, Cohort) %>%
summarise(mean(Abundance)) %>%
ungroup() %>%
spread(TermPre, `mean(Abundance)`) %>%
filter(Cohort=="UAB"|Cohort=="Stanford",
`T`==0|P==0,
!(`T`==0&P==0)) %>%
mutate(`T`=-`T`) %>%
gather("TermPre", "Abundance", c(`T`, P)) %>% #make abundance one column instead of separate T and P columsn
filter(Abundance!=0) %>% # get rid of duplicate entries where abundance = 0
left_join(GFP0, by=c("uniref", "unirefAnnotation")) %>%
left_join(PWnames, by="Pathway")
ggplot(alternatePresence, aes(x=reorder(uniref, Abundance), y=Abundance))+
geom_col() +
theme(axis.text.x = element_blank()) +
labs(x="Uniref90", y="Average reads per million") +
facet_wrap(~Cohort) +
annotate("text", label = "greater in PTB", x=300, y = 25) +
annotate("text", label = "greater in TB", x=300, y = -50)
# remove bulk of the figure with little variation
alternatePresence %>%
filter(Abundance>=10|Abundance<=10) %>%
ggplot(aes(x=reorder(uniref, Abundance), y=Abundance)) +
geom_col() +
theme(axis.text.x = element_blank()) +
labs(x="Uniref90", y="Average reads per million") +
facet_wrap(~Cohort) +
annotate("text", label = "greater in PTB", x=300, y = 25) +
annotate("text", label = "greater in TB", x=300, y = -50)
Save list of alternately present genes and show list of greatest differences
alternatePresence %>%
filter(Abundance>=10|Abundance<=-20) %>%
arrange(-Abundance) %>%
print(n = nrow(.), width = getOption("tibble.width"), n_extra = NULL)
## # A tibble: 33 x 7
## uniref unirefAnnotation Cohort TermPre Abundance Pathway
## <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 UniRe… NO_NAME UAB P 53.7 <NA>
## 2 UniRe… NO_NAME Stanf… P 25.4 <NA>
## 3 UniRe… NO_NAME Stanf… P 17.0 <NA>
## 4 UniRe… NO_NAME UAB P 16.1 <NA>
## 5 UniRe… NO_NAME Stanf… P 13.5 <NA>
## 6 UniRe… NO_NAME UAB P 12.6 <NA>
## 7 UniRe… NO_NAME UAB P 11.7 <NA>
## 8 UniRe… NO_NAME UAB P 11.0 <NA>
## 9 UniRe… NO_NAME UAB P 10.5 <NA>
## 10 UniRe… NO_NAME UAB P 10.2 <NA>
## 11 UniRe… NO_NAME Stanf… T -20.6 <NA>
## 12 UniRe… NO_NAME Stanf… T -24.9 <NA>
## 13 UniRe… NO_NAME UAB T -25.4 <NA>
## 14 UniRe… NO_NAME UAB T -25.5 <NA>
## 15 UniRe… NO_NAME Stanf… T -25.8 <NA>
## 16 UniRe… NO_NAME Stanf… T -27.9 <NA>
## 17 UniRe… Hypothetical me… UAB T -28.5 <NA>
## 18 UniRe… NO_NAME Stanf… T -31.0 <NA>
## 19 UniRe… Histidine kinase UAB T -33.5 <NA>
## 20 UniRe… NO_NAME Stanf… T -34.2 <NA>
## 21 UniRe… NO_NAME UAB T -34.3 <NA>
## 22 UniRe… NO_NAME UAB T -34.6 <NA>
## 23 UniRe… Response regula… UAB T -35.0 <NA>
## 24 UniRe… Putative nucleo… UAB T -35.2 <NA>
## 25 UniRe… NO_NAME UAB T -39.9 <NA>
## 26 UniRe… NO_NAME Stanf… T -42.4 <NA>
## 27 UniRe… NO_NAME UAB T -43.4 <NA>
## 28 UniRe… NO_NAME Stanf… T -50.2 <NA>
## 29 UniRe… NO_NAME UAB T -90.1 <NA>
## 30 UniRe… NO_NAME UAB T -113. <NA>
## 31 UniRe… NO_NAME Stanf… T -116. <NA>
## 32 UniRe… NO_NAME UAB T -122. <NA>
## 33 UniRe… NO_NAME Stanf… T -225. <NA>
## # … with 1 more variable: pathwayAnnotation <chr>
Need to continure to assess gene function and pathway, and also look at distribution across samples of each of these genes.
Assess reads per million in each annotated pathway (MetaCyc) from Gardnerella vaginalis sample across in term versus preterm birth. Cumulative reads in these pathways attributed to Gardnerella vaginalis determined by Humann2.
pathways <- unique(PA2$Pathway)
pathways
## [1] "UNINTEGRATED"
## [2] "COA-PWY-1: coenzyme A biosynthesis II (mammalian)"
## [3] "NONOXIPENT-PWY: pentose phosphate pathway (non-oxidative branch)"
## [4] "PENTOSE-P-PWY: pentose phosphate pathway"
## [5] "PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing)"
## [6] "PWY-5100: pyruvate fermentation to acetate and lactate II"
## [7] "PWY-5686: UMP biosynthesis"
## [8] "PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II"
## [9] "PWY-6151: S-adenosyl-L-methionine cycle I"
## [10] "PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis"
## [11] "PWY-6385: peptidoglycan biosynthesis III (mycobacteria)"
## [12] "PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing)"
## [13] "PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing)"
## [14] "PWY-7219: adenosine ribonucleotides de novo biosynthesis"
## [15] "PWY-7221: guanosine ribonucleotides de novo biosynthesis"
## [16] "THRESYN-PWY: superpathway of L-threonine biosynthesis"
map(pathways, ~filter(PA2, Pathway==.x) %>%
ggplot(., aes(x=TermPre, y=Abundance, color=TermPre)) +
geom_jitter() +
geom_boxplot(alpha=0) +
scale_y_log10() +
labs(title=.x, x=element_blank(), y="Abundance (reads per million") +
scale_x_discrete(labels = c("Preterm", "Term")) +
scale_color_manual(values=c("#56B4E9", "#D55E00")) +
theme(legend.position = "none") +
facet_wrap(~Cohort))
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
Need to find cohort information of samples of unkown cohort. Some differences apparent between Stanford and UAB cohorts. of note is that no PTB samples have reads from the peptidoglycan biosynthesis III pathway in the Stanford cohort and only 3 Stanford term samples had reads from this pathway.
PC3 <- PC2 %>%
filter(Pathway!="UNINTEGRATED") # covereage value of "unintegrated" is meaningless
map(pathways[2:length(pathways)], ~filter(PC3, Pathway==.x) %>% # remove unintegrated from pathways list.
ggplot(., aes(x=TermPre, y=Coverage, color=TermPre)) +
geom_jitter() +
geom_boxplot(alpha=0) +
labs(title=.x, x=element_blank()) +
scale_x_discrete(labels = c("Preterm", "Term")) +
ylim(0,1) +
scale_color_manual(values=c("#56B4E9", "#D55E00")) +
theme(legend.position = "none") +
facet_wrap(~Cohort))
## [[1]]
## Warning: Removed 23 rows containing missing values (geom_point).
##
## [[2]]
## Warning: Removed 47 rows containing missing values (geom_point).
##
## [[3]]
## Warning: Removed 43 rows containing missing values (geom_point).
##
## [[4]]
## Warning: Removed 27 rows containing missing values (geom_point).
##
## [[5]]
## Warning: Removed 20 rows containing missing values (geom_point).
##
## [[6]]
## Warning: Removed 12 rows containing missing values (geom_point).
##
## [[7]]
## Warning: Removed 9 rows containing missing values (geom_point).
##
## [[8]]
## Warning: Removed 15 rows containing missing values (geom_point).
##
## [[9]]
## Warning: Removed 17 rows containing missing values (geom_point).
##
## [[10]]
## Warning: Removed 53 rows containing missing values (geom_point).
##
## [[11]]
## Warning: Removed 24 rows containing missing values (geom_point).
##
## [[12]]
## Warning: Removed 19 rows containing missing values (geom_point).
##
## [[13]]
## Warning: Removed 17 rows containing missing values (geom_point).
##
## [[14]]
## Warning: Removed 17 rows containing missing values (geom_point).
##
## [[15]]
## Warning: Removed 23 rows containing missing values (geom_point).
Coverage values vary from 0 to 1 and are a measure of the certainty that the pathway is present in the sample. Note that all coverage values for PWY-6385: peptidoglycan biosynthesis III (mycobacteria) were 0, jittering is causing it to appear that some values were greater than 0.
-Try assessing pathway information with KEGG pathways, since most gene families are unable to be mapped to a metacyc pathway (with provided Humann2 database), or consider a curated database. -Asses disrtibution across samples of certain gene familes. -Compare against strains that are present in each sample!!!
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggrepel_0.8.1 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.0.1
## [5] purrr_0.3.2 readr_1.3.1 tidyr_0.8.3 tibble_2.1.1
## [9] ggplot2_3.1.1 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.6 haven_2.1.0 lattice_0.20-38
## [5] colorspace_1.4-1 generics_0.0.2 htmltools_0.3.6 yaml_2.2.0
## [9] utf8_1.1.4 rlang_0.3.4 pillar_1.3.1 glue_1.3.1
## [13] withr_2.1.2 modelr_0.1.4 readxl_1.3.1 plyr_1.8.4
## [17] munsell_0.5.0 gtable_0.3.0 cellranger_1.1.0 rvest_0.3.3
## [21] evaluate_0.13 labeling_0.3 knitr_1.22 fansi_0.4.0
## [25] broom_0.5.2 Rcpp_1.0.1 scales_1.0.0 backports_1.1.4
## [29] jsonlite_1.6 hms_0.4.2 digest_0.6.18 stringi_1.4.3
## [33] grid_3.5.2 cli_1.1.0 tools_3.5.2 magrittr_1.5
## [37] lazyeval_0.2.2 crayon_1.3.4 pkgconfig_2.0.2 xml2_1.2.0
## [41] lubridate_1.7.4 assertthat_0.2.1 rmarkdown_1.12 httr_1.4.0
## [45] rstudioapi_0.10 R6_2.4.0 nlme_3.1-139 compiler_3.5.2